In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Clicca qui per nascondere/ mostrare il codice"></form>''')
Out[1]:
In [2]:
import datetime

print(datetime.datetime.today())
2020-10-22 12:21:34.157744
In [3]:
import pandas as pd
import numpy as np
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px


import warnings
warnings.filterwarnings('ignore')
In [4]:
url_r = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv"
data_region = pd.read_csv(url_r)
#print(data_region.dtypes)
#print(data_region.isnull().sum())
#print(data_region.shape)
#print(data_region.head())

url_p = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-province/dpc-covid19-ita-province.csv"
data_province = pd.read_csv(url_p)
#print(data_province.dtypes)
#print(data_province.isnull().sum())
#print(data_province.shape)
#print(data_province.head())

url_n = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
data_national = pd.read_csv(url_n)
#print(data_national.dtypes)
#print(data_national.isnull().sum())
#print(data_national.shape)
#print(data_national.head())
In [5]:
#add daily cases/deaths/recovered columns
data_national['new_cases'] = data_national['totale_casi'].diff()
data_national['new_deaths'] = data_national['deceduti'].diff()
data_national['new_recovered'] = data_national['dimessi_guariti'].diff()
data_national['new_swabs'] = data_national['tamponi'].diff()
data_national['new_unique_tested'] = data_national['casi_testati'].diff()
#add a day/day-1 percentage change for new_cases
data_national['daily_cases_perc_change'] = round((data_national['new_cases'].pct_change(1))*100,2)
data_national['daily_swab_perc_change'] = round((data_national['new_swabs'].pct_change(1))*100,2)
data_national['daily_unique_tested_perc_change'] = round((data_national['new_unique_tested'].pct_change(1))*100,2)
#detect ratio
data_national['detect_ratio_swabs'] = round((data_national['new_cases'] / data_national['new_swabs'])*100,2)
data_national['detect_ratio_cases'] = round((data_national['new_cases'] / data_national['new_unique_tested'])*100,2)
data_national.tail(10)
Out[5]:
data stato ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi dimessi_guariti ... new_cases new_deaths new_recovered new_swabs new_unique_tested daily_cases_perc_change daily_swab_perc_change daily_unique_tested_perc_change detect_ratio_swabs detect_ratio_cases
231 2020-10-12T17:00:00 ITA 4821 452 5273 77491 82764 3689 4619 240600 ... 4619.0 39.0 891.0 85442.0 59649.0 -15.34 -18.36 -13.18 5.41 7.74
232 2020-10-13T17:00:00 ITA 5076 514 5590 81603 87193 4429 5901 242028 ... 5898.0 41.0 1428.0 112544.0 70260.0 27.69 31.72 17.79 5.24 8.39
233 2020-10-14T17:00:00 ITA 5470 539 6009 86436 92445 5252 7332 244065 ... 7332.0 43.0 2037.0 152196.0 87601.0 24.31 35.23 24.68 4.82 8.37
234 2020-10-15T17:00:00 ITA 5796 586 6382 92884 99266 6821 8804 245964 ... 8803.0 83.0 1899.0 162932.0 98542.0 20.06 7.05 12.49 5.40 8.93
235 2020-10-16T17:00:00 ITA 6178 638 6816 100496 107312 8046 10010 247872 ... 10009.0 55.0 1908.0 150377.0 92402.0 13.70 -7.71 -6.23 6.66 10.83
236 2020-10-17T17:00:00 ITA 6617 705 7322 109613 116935 9623 10925 249127 ... 10925.0 47.0 1255.0 165837.0 103326.0 9.15 10.28 11.82 6.59 10.57
237 2020-10-18T17:00:00 ITA 7131 750 7881 118356 126237 9302 11705 251461 ... 11705.0 69.0 2334.0 146541.0 95554.0 7.14 -11.64 -7.52 7.99 12.25
238 2020-10-19T17:00:00 ITA 7676 797 8473 125530 134003 7766 9338 252959 ... 9337.0 73.0 1498.0 98862.0 65824.0 -20.23 -32.54 -31.11 9.44 14.18
239 2020-10-20T17:00:00 ITA 8454 870 9324 133415 142739 8736 10874 255005 ... 10871.0 89.0 2046.0 144737.0 87680.0 16.43 46.40 33.20 7.51 12.40
240 2020-10-21T17:00:00 ITA 9057 926 9983 145459 155442 12703 15199 257374 ... 15199.0 127.0 2369.0 177848.0 106488.0 39.81 22.88 21.45 8.55 14.27

10 rows × 27 columns

In [6]:
#regional data preparation

data_region_Abruzzo = data_region[(data_region['denominazione_regione'] == 'Abruzzo')]
data_region_Basilicata = data_region[(data_region['denominazione_regione'] == 'Basilicata')]
data_region_Bolzano = data_region[(data_region['denominazione_regione'] == 'P.A. Bolzano')]
data_region_Calabria = data_region[(data_region['denominazione_regione'] == 'Calabria')]
data_region_Campania = data_region[(data_region['denominazione_regione'] == 'Campania')]
data_region_EmiliaR = data_region[(data_region['denominazione_regione'] == 'Emilia-Romagna')]
data_region_Friuli = data_region[(data_region['denominazione_regione'] == 'Friuli Venezia Giulia')]
data_region_Lazio = data_region[(data_region['denominazione_regione'] == 'Lazio')]
data_region_Liguria = data_region[(data_region['denominazione_regione'] == 'Liguria')]
data_region_Lombardia = data_region[(data_region['denominazione_regione'] == 'Lombardia')]
data_region_Marche = data_region[(data_region['denominazione_regione'] == 'Marche')]
data_region_Molise = data_region[(data_region['denominazione_regione'] == 'Molise')]
data_region_Piemonte = data_region[(data_region['denominazione_regione'] == 'Piemonte')]
data_region_Puglia = data_region[(data_region['denominazione_regione'] == 'Puglia')]
data_region_Sardegna = data_region[(data_region['denominazione_regione'] == 'Sardegna')]
data_region_Sicilia = data_region[(data_region['denominazione_regione'] == 'Sicilia')]
data_region_Toscana = data_region[(data_region['denominazione_regione'] == 'Toscana')]
data_region_Trento = data_region[(data_region['denominazione_regione'] == 'P.A. Trento')]
data_region_Umbria = data_region[(data_region['denominazione_regione'] == 'Umbria')]
data_region_VAosta = data_region[(data_region['denominazione_regione'] == "Valle d'Aosta")]
data_region_Veneto = data_region[(data_region['denominazione_regione'] == 'Veneto')]

def region_apply(region):
    for x in region:
        x['new_cases'] =  x['totale_casi'].diff()
        x['new_deaths'] = x['deceduti'].diff()
        x['new_recovered'] = x['dimessi_guariti'].diff()
        x['new_swabs'] = x['tamponi'].diff()
        #add a day/day-1 percentage change for new_cases
        x['daily_cases_perc_change'] = round((x['new_cases'].pct_change(1))*100,2)
        x['daily_swab_perc_change'] = round((x['new_swabs'].pct_change(1))*100,2)
        #detect ratio
        x['detect_ratio'] = round((x['new_cases'] / x['new_swabs'])*100,2)
        return; 

region_apply([data_region_Abruzzo])  
region_apply([data_region_Basilicata]) 
region_apply([data_region_Bolzano])
region_apply([data_region_Calabria])
region_apply([data_region_Campania])
region_apply([data_region_EmiliaR])
region_apply([data_region_Friuli])
region_apply([data_region_Lazio])
region_apply([data_region_Liguria])
region_apply([data_region_Lombardia])
region_apply([data_region_Marche])
region_apply([data_region_Molise])
region_apply([data_region_Piemonte])
region_apply([data_region_Puglia])
region_apply([data_region_Sardegna])
region_apply([data_region_Sicilia])
region_apply([data_region_Toscana])
region_apply([data_region_Trento])
region_apply([data_region_VAosta])
region_apply([data_region_Veneto])
In [7]:
data_region_Nordovest = data_region[(data_region.denominazione_regione.isin(['Piemonte', 'Lombardia', 'Liguria',"Valle d'Aosta"]))]
data_region_Nordest = data_region[(data_region.denominazione_regione.isin(['Emilia-Romagna', 'P.A. Bolzano', 'P.A. Trento', 'Veneto', 'Friuli Venezia Giulia']))]
data_region_Centro = data_region[(data_region.denominazione_regione.isin(['Toscana', 'Umbria', 'Marche', 'Lazio']))]
data_region_Sudisole = data_region[(data_region.denominazione_regione.isin(['Abruzzo', 'Molise', 'Campania', 'Puglia', 'Basilicata', 'Calabria', 'Sicilia', 'Sardegna']))]

cases_Nordovest = data_region_Nordovest.groupby('data').sum()
region_apply([cases_Nordovest])  
cases_Nordovest['data'] = cases_Nordovest.index

cases_Nordest = data_region_Nordest.groupby('data').sum()
region_apply([cases_Nordest])  
cases_Nordest['data'] = cases_Nordest.index

cases_Centro = data_region_Centro.groupby('data').sum()
region_apply([cases_Centro])  
cases_Centro['data'] = cases_Centro.index

cases_Sudisole = data_region_Sudisole.groupby('data').sum()
region_apply([cases_Sudisole])  
cases_Sudisole['data'] = cases_Sudisole.index

cases_Nordovest.tail(5)
Out[7]:
codice_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare totale_positivi variazione_totale_positivi nuovi_positivi ... tamponi casi_testati new_cases new_deaths new_recovered new_swabs daily_cases_perc_change daily_swab_perc_change detect_ratio data
data
2020-10-17T17:00:00 13 180.689065 33.123883 2025 187 2212 32309 34521 3712 4178 ... 3726942 2263052.0 4178.0 16.0 450.0 45713.0 8.46 0.58 9.14 2020-10-17T17:00:00
2020-10-18T17:00:00 13 180.689065 33.123883 2267 206 2473 34941 37414 2893 4541 ... 3771332 2291990.0 4541.0 26.0 1622.0 44390.0 8.69 -2.89 10.23 2020-10-18T17:00:00
2020-10-19T17:00:00 13 180.689065 33.123883 2481 212 2693 37117 39810 2396 3078 ... 3797797 2309178.0 3078.0 16.0 666.0 26465.0 -32.22 -40.38 11.63 2020-10-19T17:00:00
2020-10-20T17:00:00 13 180.689065 33.123883 2846 222 3068 40327 43395 3585 4401 ... 3839186 2334090.0 4401.0 31.0 785.0 41389.0 42.98 56.39 10.63 2020-10-20T17:00:00
2020-10-21T17:00:00 13 180.689065 33.123883 3221 244 3465 45312 48777 5382 6581 ... 3895718 2369221.0 6581.0 36.0 1163.0 56532.0 49.53 36.59 11.64 2020-10-21T17:00:00

5 rows × 25 columns

In [8]:
fig2 = px.bar(data_national, x='data', y='totale_casi',
             hover_data=['totale_casi'], color='totale_casi',
             height=600, color_continuous_scale='Sunsetdark')

fig2.update_layout(title_text='Total COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig2.update_yaxes(tick0=0, dtick=25000,  gridcolor='White')
fig2.show()
In [9]:
fig22 = px.bar(data_national, x='data', y='totale_positivi',
             hover_data=['totale_positivi'], color='totale_positivi',
             height=600, color_continuous_scale='Sunsetdark')

fig22.update_layout(title_text='Active COVID19 Cases - Italy',
                  xaxis_rangeslider_visible=True)
fig22.update_yaxes(tick0=0, dtick=10000,  gridcolor='White')
fig22.show()
In [10]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_cases'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_cases'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_cases'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_cases'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily Coronavirus new cases - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [11]:
fig = go.Figure()

fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordovest['data'], y=cases_Nordovest['new_swabs'], name="North-West",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Nordest['data'], y=cases_Nordest['new_swabs'], name="North-East",
                         line_color='green'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Centro['data'], y=cases_Centro['new_swabs'], name="Center",
                         line_color='darkviolet'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=cases_Sudisole['data'], y=cases_Sudisole['new_swabs'], name="South and Islands",
                         line_color='darkblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="All Italy",
                         line_color='deepskyblue'))

fig.update_layout(title_text='Daily swabs - All Italy and Regions',
                  xaxis_rangeslider_visible=True)


fig.show()
In [12]:
fig = go.Figure()


fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily Deaths",
                         line_color='red'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily Recovered",
                         line_color='green'))



fig.update_layout(title_text='Daily Coronavirus Deaths and Recoveries - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=500)

fig.show()
In [13]:
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily Cases",
                         line_color='deepskyblue'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_swabs'], name="Daily swabs",
                         line_color='purple'))
fig.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_unique_tested'], name="Daily unique tested",
                         line_color='red'))
fig.update_layout(title_text='Daily Coronavirus new cases and swabs - Italy',
                  xaxis_rangeslider_visible=True)

fig.update_yaxes(tick0=0, dtick=10000)

fig.show()
In [14]:
fig3 = go.Figure()

fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_swabs'], name="Daily detect ratio - Italy",
                         line_color='purple'))
fig3.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['detect_ratio_cases'], name="Daily unique detect ratio - Italy",
                         line_color='red'))


fig3.update_layout(title_text="Daily Swabs detect ratio - Italy",
                  xaxis_rangeslider_visible=True)
fig3.update_yaxes(dtick=5)
In [15]:
fig4 = go.Figure()

#fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
#                         line_color='blue'))
fig4.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['totale_ospedalizzati'], name="Daily total Hospital - Italy",
                         line_color='green'))

fig4.update_layout(title_text="Daily Total Hospital - Italy",
                  xaxis_rangeslider_visible=True)
fig4.update_yaxes(dtick=2000)
In [16]:
fig5 = go.Figure()

fig5.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='blue'))

fig5.update_layout(title_text="Daily Total UTI - Italy",
                  xaxis_rangeslider_visible=True)
fig5.update_yaxes(dtick=200)
In [17]:
fig6 = go.Figure()

fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_cases_perc_change'], name="Daily cases percentual change - Italy",
                         line_color='purple'))
fig6.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['daily_swab_perc_change'], name="Daily swab percentual change - Italy",
                         line_color='red'))


fig6.update_layout(title_text="Daily v- Italy",
                  xaxis_rangeslider_visible=True)
fig6.update_yaxes(dtick=40)
In [18]:
fig7 = go.Figure()

fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_recovered'], name="Daily new recovered - Italy",
                         line_color='purple'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))
fig7.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_cases'], name="Daily new cases - Italy",
                         line_color='green'))

fig7.update_layout(title_text="Daily change - Italy",
                  xaxis_rangeslider_visible=True)
fig7.update_yaxes(dtick=500)
In [19]:
fig8 = go.Figure()

fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['terapia_intensiva'], name="Daily total UTI - Italy",
                         line_color='purple'))
fig8.add_trace(go.Scatter(mode = "lines+markers", x=data_national['data'], y=data_national['new_deaths'], name="Daily new deaths - Italy",
                         line_color='red'))


fig8.update_layout(title_text="Daily UTI vs  Death - Italy",
                  xaxis_rangeslider_visible=True)
fig8.update_yaxes(dtick=200)
In [20]:
data_ge=data_province[data_province['sigla_provincia']=='GE']
#print(data_ge)
fig9 = go.Figure()

fig9.add_trace(go.Scatter(mode = "lines+markers", x=data_ge['data'], y=data_ge['totale_casi'], name="Daily cases GE - Italy",
                         line_color='red'))



fig9.update_layout(title_text="Daily cases GE - Italy",
                  xaxis_rangeslider_visible=True)
fig9.update_yaxes(dtick=1000)
In [21]:
fig10 = go.Figure()

fig10.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['totale_casi'], name="Daily cases Liguria - Italy",
                         line_color='red'))



fig10.update_layout(title_text="Daily cases Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig10.update_yaxes(dtick=1000)
In [22]:
fig11 = go.Figure()

fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['terapia_intensiva'], name="Daily UTI Liguria - Italy",
                         line_color='red'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['ricoverati_con_sintomi'], name="Daily hospital Liguria - Italy",
                         line_color='purple'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_deaths'], name="Daily new deaths Liguria - Italy",
                         line_color='green'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
                         line_color='yellow'))

fig11.update_layout(title_text="Daily change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig11.update_yaxes(dtick=100)
In [23]:
fig12 = go.Figure()

#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily UTI Liguria - Italy",
#                         line_color='red'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily hospital Liguria - Italy",
#                         line_color='purple'))
fig12.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['detect_ratio'], name="Daily detect  ratio Liguria - Italy",
                         line_color='green'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['nuovi_positivi'], name="Daily new cases Liguria - Italy",
#                         line_color='blue'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_swabs'], name="Daily new swabs Liguria - Italy",
#line_color='black'))
#fig11.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['new_recovered'], name="Daily new recovered Liguria - Italy",
#                         line_color='yellow'))

fig12.update_layout(title_text="Daily detect ratio Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig12.update_yaxes(dtick=20)
In [24]:
fig13 = go.Figure()

fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_cases_perc_change'], name="Daily cases perc change Liguria - Italy",
                         line_color='red'))
fig13.add_trace(go.Scatter(mode = "lines+markers", x=data_region_Liguria['data'], y=data_region_Liguria['daily_swab_perc_change'], name="Daily swab perc change Liguria - Italy",
                         line_color='purple'))


fig13.update_layout(title_text="Daily percentual change Liguria - Italy",
                  xaxis_rangeslider_visible=True)
fig13.update_yaxes(dtick=100)
In [25]:
print(data_national.dtypes)
data                                object
stato                               object
ricoverati_con_sintomi               int64
terapia_intensiva                    int64
totale_ospedalizzati                 int64
isolamento_domiciliare               int64
totale_positivi                      int64
variazione_totale_positivi           int64
nuovi_positivi                       int64
dimessi_guariti                      int64
deceduti                             int64
casi_da_sospetto_diagnostico       float64
casi_da_screening                  float64
totale_casi                          int64
tamponi                              int64
casi_testati                       float64
note                                object
new_cases                          float64
new_deaths                         float64
new_recovered                      float64
new_swabs                          float64
new_unique_tested                  float64
daily_cases_perc_change            float64
daily_swab_perc_change             float64
daily_unique_tested_perc_change    float64
detect_ratio_swabs                 float64
detect_ratio_cases                 float64
dtype: object
In [ ]:
 
In [ ]:
 
In [26]:
import pandas as pd
import numpy as np
import datetime
import requests
import warnings

import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates
import seaborn as sns
import plotly.offline as py


from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OrdinalEncoder
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima_model import ARIMA
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, add_changepoints_to_plot

from IPython.display import Image
warnings.filterwarnings('ignore')
%matplotlib inline
In [27]:
datas = pd.DataFrame(columns = ['ds','y'])
datas['ds'] = data_national['data']
datas['y'] = data_national['new_cases']

prop=Prophet()
prop.fit(datas)
future=prop.make_future_dataframe(periods=3)
prop_forecast=prop.predict(future)
forecast = prop_forecast[['ds','yhat']].tail(30)

fig = plot_plotly(prop, prop_forecast)
fig = prop.plot(prop_forecast,xlabel='Date',ylabel='Confirmed Cases')
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
In [28]:
def fit_predict_model(dataframe, interval_width = 0.99, changepoint_range = 0.8):
    m = Prophet(daily_seasonality = False, yearly_seasonality = False, weekly_seasonality = False)
    m = m.fit(dataframe)
    
    forecast = m.predict(dataframe)
    forecast['fact'] = dataframe['y'].reset_index(drop = True)
    print('Displaying Prophet plot')
    fig1 = m.plot(forecast)
    m.plot_components(forecast)
    return forecast
    
pred = fit_predict_model(datas)
Displaying Prophet plot
In [29]:
def detect_anomalies(forecast):
    forecasted = forecast[['ds','trend', 'yhat', 'yhat_lower', 'yhat_upper', 'fact']].copy()
    #forecast['fact'] = df['y']

    forecasted['anomaly'] = 0
    forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomaly'] = 1
    forecasted.loc[forecasted['fact'] < forecasted['yhat_lower'], 'anomaly'] = -1

    #anomaly importances
    forecasted['importance'] = 0
    forecasted.loc[forecasted['anomaly'] ==1, 'importance'] = \
        (forecasted['fact'] - forecasted['yhat_upper'])/forecast['fact']
    forecasted.loc[forecasted['anomaly'] ==-1, 'importance'] = \
        (forecasted['yhat_lower'] - forecasted['fact'])/forecast['fact']
    
    return forecasted

pred = detect_anomalies(pred)
pred.head()
Out[29]:
ds trend yhat yhat_lower yhat_upper fact anomaly importance
0 2020-02-24 18:00:00 2342.848868 2342.848868 492.397974 4261.755885 NaN 0 0.000000
1 2020-02-25 18:00:00 2366.711131 2366.711131 479.327224 4141.199359 93.0 -1 4.154056
2 2020-02-26 18:00:00 2390.573393 2390.573393 497.380156 4163.678715 78.0 -1 5.376669
3 2020-02-27 18:00:00 2414.435655 2414.435655 713.643152 4242.426976 250.0 -1 1.854573
4 2020-02-28 18:00:00 2438.297918 2438.297918 645.008825 4520.114671 238.0 -1 1.710121
In [30]:
future = prop.make_future_dataframe(periods=1)
future.tail()
forecast1 = prop.predict(future)
forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
Out[30]:
ds yhat yhat_lower yhat_upper
237 2020-10-18 17:00:00 6400.687778 4928.142903 7839.152609
238 2020-10-19 17:00:00 6164.321888 4697.468650 7720.443957
239 2020-10-20 17:00:00 6396.848007 4828.602763 7949.210704
240 2020-10-21 17:00:00 6775.069592 5285.167124 8258.244208
241 2020-10-22 17:00:00 6841.910101 5383.362583 8433.311808
In [ ]: